package com.crawler.waf.serializer;

import java.io.IOException;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;

/**
 * html内容处理
 * @author rubekid
 *
 * 2017年7月5日 下午2:34:35
 */
public class HtmlSerializer extends JsonSerializer<String> { 
	
	@Override
	public void serialize(String value, JsonGenerator gen, SerializerProvider serializers)
			throws IOException, JsonProcessingException {
		
		//去html包裹
		value = value.replaceAll("[\\s\\S]*<body[^>]*>([\\s\\S]+)</body>[\\s\\S]*", "$1");
		//图片处理
		value = value.replaceAll("<img[^>]+src=\"(.*?)\"[^>]?>", "<img src=\"$1\" />").replaceAll("<img([^>]+)>", "<img style=\"width:100%;\" $1>");
		String content = "<html>" +
		"<head>" +
		    "<meta charset=\"utf-8\" />" +
		    "<meta name=\"viewport\" content=\"width=device-width,minimum-scale=1.0,maximum-scale=1.0,user-scalable=no\"/>" +
		    "<title>爬虫科技有限公司</title>" +
		"</head>" +
		"<body>" +
			value + 
		"</body>" +
		"</html>";
		
		gen.writeString(content);
	}  
	
	
}  